###Load required libraries
library(knitr)
library(DESeq2)
library(phyloseq)
library(ggplot2)
library(gridExtra)
library(reshape2)
library(vegan)
library(untb)
library(minpack.lm)
library(Hmisc)
###Import data
ddat.phy <- readRDS("~/Dropbox/oral-clinical-data-analysis-data/periodontology2000_hypo.RDS")
ddat.otu <- as.data.frame(otu_table(ddat.phy))
ddat.map <- as.data.frame(sample_data(ddat.phy))
ddat.tax <- as.data.frame(tax_table(ddat.phy))
#Calculate sampling depth
num.seqs <- rowSums(ddat.otu)
num.seqs <- as.data.frame(sort(num.seqs))
colnames(num.seqs) <- "num.seqs"
num.map <- merge(ddat.map, num.seqs, by.x=0, by.y=0, all=TRUE)
rownames(num.map) <- num.map$Row.names
num.map <- num.map[,-which(names(num.map) %in% c("Row.names"))]
###Sloan neutral community model
#Define neutral model fiting function
sncm.fit <- function(spp){
require(minpack.lm)
options(warn=-1)
N <- mean(apply(spp, 1, sum))
p <- apply(spp, 2, mean)/N
spp.bi <- 1*(spp>0)
freq <- apply(spp.bi, 2, mean)
d <- 1/N
m.fit <- nlsLM(freq ~ pbeta(d, N*m*p, N*m*(1-p), lower.tail=FALSE), start=list(m=0.1))
m.ci <- confint(m.fit, 'm', level=0.95)
#Calculate goodness-of-fit (R-squared and Root Mean Squared Error)
freq.pred <- pbeta(d, N*coef(m.fit)*p, N*coef(m.fit)*(1-p), lower.tail=FALSE)
Rsqr <- 1 - (sum((freq - freq.pred)^2))/(sum((freq - mean(freq))^2))
RMSE <- sqrt(sum((freq-freq.pred)^2)/(length(freq)-1))
#Results
fitstats <- data.frame(m=numeric(), m.ci=numeric(), Rsqr=numeric(), RMSE=numeric())
fitstats[1,] <- c(coef(m.fit), coef(m.fit)-m.ci[1], Rsqr, RMSE)
return(fitstats)
}
#Define function for predicted values
sncm.pred <- function(spp){
require(minpack.lm)
require(Hmisc)
options(warn=-1)
N <- mean(apply(spp, 1, sum))
p <- apply(spp, 2, mean)/N
spp.bi <- 1*(spp>0)
freq <- apply(spp.bi, 2, mean)
d <- 1/N
m.fit <- nlsLM(freq ~ pbeta(d, N*m*p, N*m*(1-p), lower.tail=FALSE), start=list(m=0.1))
#Calculate predicted values
freq.pred <- pbeta(d, N*coef(m.fit)*p, N*coef(m.fit)*(1-p), lower.tail=FALSE)
#Calculate residuals
pred.res <- freq - freq.pred
#Results
A <- cbind(p, freq, freq.pred, pred.res)
A <- as.data.frame(A)
colnames(A) <- c('p', 'freq', 'freq.pred', 'pred.res')
return(A)
}
###Rarefaction analysis
#Select maximum sampling depth and remove samples under that depth
min.seq <- 50000
ddatr.otu <- ddat.otu[rowSums(ddat.otu) >= min.seq, ]
ddatr.otu <- rrarefy(ddatr.otu, min.seq)
ddatr.otu <- ddatr.otu[,(colSums(ddatr.otu) != 0)]
ddatr.map <- ddat.map[(rownames(ddat.map) %in% rownames(ddatr.otu)),]
#Define rarefaction levels to analyze
rare.lvls <- seq.int(from=1000, to=50000, by=1000)
#Fit model at different rarefaction depths
ncm.stats <- data.frame(sncm_m=numeric(), sncm_m.ci=numeric(), sncm_Rsqr=numeric(), sncm_RMSE=numeric(), Rarefaction=numeric())
for(i in 1:length(rare.lvls)){
#Rarefy
otu.i <- rrarefy(ddatr.otu, rare.lvls[i])
otu.i <- otu.i[,(colSums(otu.i) != 0)]
#Fit models
A.i <- sncm.fit(otu.i)
fit.stats.i <- c(A.i$m, A.i$m.ci, A.i$Rsqr, A.i$RMSE, rare.lvls[i])
names(fit.stats.i) <- c('sncm_m', 'sncm_m.ci', 'sncm_Rsqr', 'sncm_RMSE', 'Rarefaction')
ncm.stats[i,] <- fit.stats.i
}
#Rarefy to 10,000 sequences per sample
min.seq <- 10000
otu.r <- ddat.otu[rowSums(ddat.otu) >= min.seq, ]
otu.r <- rrarefy(otu.r, min.seq)
otu.r <- otu.r[,(colSums(otu.r) != 0)]
map.r <- ddat.map[(rownames(ddat.map) %in% rownames(otu.r)),]
#Fit model by habitat (gingival and tooth aspect) within subject
ncm.stats <- data.frame(Metacommunity=character(), Subject=character(), Aim=character(), Habitat_Class=character(), Tooth_Aspect=character(), sncm_m=numeric(), sncm_m.ci=numeric(), sncm_Rsqr=numeric(), sncm_RMSE=numeric(), No.Samples=numeric(), stringsAsFactors=FALSE)
for(s in 1:length(unique(map.r$Subject))){
s.id <- unique(map.r$Subject)[s]
map.s <- subset(map.r, Subject == s.id)
aim.s <- unique(map.s$Aim)
for(h in 1:length(unique(map.s$Habitat_Class))){
habitat <- unique(map.s$Habitat_Class)[h]
map.h <- subset(map.s, Habitat_Class == habitat)
for(a in 1:length(unique(map.h$Tooth_Aspect))){
aspect <- unique(map.h$Tooth_Aspect)[a]
map.a <- subset(map.h, Tooth_Aspect == aspect)
otu.a <- otu.r[(rownames(otu.r) %in% rownames(map.a)),]
otu.a <- otu.a[,(colSums(otu.a) != 0)]
fit.a <- sncm.fit(otu.a)
meta.no <- (4*s) + (2*h) + (1*a) - 6 #Used the 'solve' function to determine coefficients
vec.a <- c(as.character(paste('M',meta.no, sep='')), as.character(s.id), as.character(aim.s), as.character(habitat), as.character(aspect), fit.a$m, fit.a$m.ci, fit.a$Rsqr, fit.a$RMSE, nrow(map.a))
names(vec.a) <- c('Metacommunity', 'Subject', 'Aim', 'Habitat_Class', 'Tooth_Aspect', 'sncm_m', 'sncm_m.ci', 'sncm_Rsqr', 'sncm_RMSE', 'No.Samples')
ncm.stats[meta.no,] <- vec.a
}
}
}
ncm.stats$Aim <- factor(ncm.stats$Aim, levels=c('SA1', 'SA3'))
ncm.stats$Habitat_Class <- factor(ncm.stats$Habitat_Class, levels=c('Supra', 'Sub'))
ncm.stats$Tooth_Aspect <- factor(ncm.stats$Tooth_Aspect, levels=c('Buccal', 'Lingual'))
#Fit model by habitat (gingival and tooth aspect) within subject and get predictions
ncm.preds <- data.frame(OTU=character(), Metacommunity=character(), Subject=character(), Aim=character(), Habitat_Class=character(), Tooth_Aspect=character(), sncm_m=numeric(), sncm_m.ci=numeric(), sncm_Rsqr=numeric(), sncm_RMSE=numeric(), No.Samples=numeric(), p=numeric(), freq=numeric(), freq.pred=numeric(), pred.res=numeric(), stringsAsFactors=FALSE)
ncm.preds[1,] <- rep(NA, times=length(ncol(ncm.preds)))
for(s in 1:length(unique(map.r$Subject))){
s.id <- unique(map.r$Subject)[s]
map.s <- subset(map.r, Subject == s.id)
aim.s <- unique(map.s$Aim)
for(h in 1:length(unique(map.s$Habitat_Class))){
habitat <- unique(map.s$Habitat_Class)[h]
map.h <- subset(map.s, Habitat_Class == habitat)
for(a in 1:length(unique(map.h$Tooth_Aspect))){
aspect <- unique(map.h$Tooth_Aspect)[a]
map.a <- subset(map.h, Tooth_Aspect == aspect)
otu.a <- otu.r[(rownames(otu.r) %in% rownames(map.a)),]
otu.a <- otu.a[,(colSums(otu.a) != 0)]
fit.a <- sncm.fit(otu.a)
pred.a <- sncm.pred(otu.a)
meta.no <- (4*s) + (2*h) + (1*a) - 6 #Used the 'solve' function to determine coefficients
df.a <- cbind(rownames(pred.a), rep(as.character(paste('M',meta.no, sep='')), times=nrow(pred.a)), rep(as.character(s.id), times=nrow(pred.a)), rep(as.character(aim.s), times=nrow(pred.a)), rep(as.character(habitat), times=nrow(pred.a)), rep(as.character(aspect), times=nrow(pred.a)), rep(fit.a$m, times=nrow(pred.a)), rep(fit.a$m.ci, times=nrow(pred.a)), rep(fit.a$Rsqr, times=nrow(pred.a)), rep(fit.a$RMSE, times=nrow(pred.a)), rep(nrow(map.a), times=nrow(pred.a)), pred.a)
colnames(df.a) <- c('OTU', 'Metacommunity', 'Subject', 'Aim', 'Habitat_Class', 'Tooth_Aspect', 'sncm_m', 'sncm_m.ci', 'sncm_Rsqr', 'sncm_RMSE', 'No.Samples', 'p', 'freq', 'freq.pred', 'pred.res')
ncm.preds <- rbind(ncm.preds, df.a)
}
}
}
ncm.preds <- ncm.preds[-1,]
#Combine with taxonomic calls
ncm.preds <- merge(ncm.preds, ddat.tax, by.x=1, by.y=0, all=FALSE)
ncm.preds$Aim <- factor(ncm.preds$Aim, levels=c('SA1', 'SA3'))
ncm.preds$Habitat_Class <- factor(ncm.preds$Habitat_Class, levels=c('Supra', 'Sub'))
ncm.preds$Tooth_Aspect <- factor(ncm.preds$Tooth_Aspect, levels=c('Buccal', 'Lingual'))
#Variance stabilization via DeSeq2 (From McMurdie)
#Function for geometric mean, set to zero when all coordinates are zero
geo_mean_protected <- function(x) {
if (all(x == 0)) {
return (0)
}
exp(mean(log(x[x != 0])))
}
ps_dds <- phyloseq_to_deseq2(ddat.phy, design = ~ Habitat_Class)
geoMeans <- apply(counts(ps_dds), 1, geo_mean_protected)
ps_dds <- estimateSizeFactors(ps_dds, geoMeans = geoMeans)
ps_dds <- estimateDispersions(ps_dds)
ddat.otu.vs <- t(getVarianceStabilizedData(ps_dds))
ddat.otu.vs[ddat.otu.vs<0] <- 0 #Replace negative values with 0
#Estimate migration rates using Gst method by habitat (gingival and tooth aspect) within subject
#On variance stabilized data:
cols <- c(colnames(ddat.map), 'I', 'm', 'No.Samples')
gst.param <- t(as.data.frame(rep(NA, times=length(cols)), row.names=cols))
for(s in 1:length(unique(ddat.map$Subject))){
s.id <- unique(ddat.map$Subject)[s]
map.s <- subset(ddat.map, Subject == s.id)
aim.s <- unique(map.s$Aim)
for(h in 1:length(unique(map.s$Habitat_Class))){
habitat <- unique(map.s$Habitat_Class)[h]
map.h <- subset(map.s, Habitat_Class == habitat)
for(a in 1:length(unique(map.h$Tooth_Aspect))){
aspect <- unique(map.h$Tooth_Aspect)[a]
map.a <- subset(map.h, Tooth_Aspect == aspect)
otu.a <- ddat.otu.vs[(rownames(ddat.otu.vs) %in% rownames(map.a)),]
otu.a <- otu.a[,(colSums(otu.a) != 0)]
gst.a <- as.data.frame(optimal.params.gst(D=t(otu.a), exact=FALSE, ci=FALSE))
gst.a <- cbind(gst.a, rep(nrow(map.a), times=nrow(map.a)))
colnames(gst.a) <- c('I', 'm', 'No.Samples')
df.a <- cbind(map.a, gst.a)
gst.param <- rbind(gst.param, df.a)
}
}
}
gst.param <- gst.param[-1,]
gst.param$Aim <- factor(gst.param$Aim, levels=c('SA1', 'SA3'))
gst.param$Habitat_Class <- factor(gst.param$Habitat_Class, levels=c('Supra', 'Sub'))
gst.param$Tooth_Aspect <- factor(gst.param$Tooth_Aspect, levels=c('Buccal', 'Lingual'))
#On rarefied data:
cols <- c(colnames(map.r), 'I', 'm', 'No.Samples')
gst.param.r <- t(as.data.frame(rep(NA, times=length(cols)), row.names=cols))
for(s in 1:length(unique(map.r$Subject))){
s.id <- unique(map.r$Subject)[s]
map.s <- subset(map.r, Subject == s.id)
aim.s <- unique(map.s$Aim)
for(h in 1:length(unique(map.s$Habitat_Class))){
habitat <- unique(map.s$Habitat_Class)[h]
map.h <- subset(map.s, Habitat_Class == habitat)
for(a in 1:length(unique(map.h$Tooth_Aspect))){
aspect <- unique(map.h$Tooth_Aspect)[a]
map.a <- subset(map.h, Tooth_Aspect == aspect)
otu.a <- otu.r[(rownames(otu.r) %in% rownames(map.a)),]
otu.a <- otu.a[,(colSums(otu.a) != 0)]
gst.a <- as.data.frame(optimal.params.gst(D=t(otu.a), exact=FALSE, ci=FALSE))
gst.a <- cbind(gst.a, rep(nrow(map.a), times=nrow(map.a)))
colnames(gst.a) <- c('I', 'm', 'No.Samples')
df.a <- cbind(map.a, gst.a)
gst.param.r <- rbind(gst.param.r, df.a)
}
}
}
gst.param.r <- gst.param.r[-1,]
gst.param.r$Aim <- factor(gst.param.r$Aim, levels=c('SA1', 'SA3'))
gst.param.r$Habitat_Class <- factor(gst.param.r$Habitat_Class, levels=c('Supra', 'Sub'))
gst.param.r$Tooth_Aspect <- factor(gst.param.r$Tooth_Aspect, levels=c('Buccal', 'Lingual'))
In order to determine whether microbial migration rates differed by tooth habitat, or by disease state, we fit the model separately to samples within each tooth habitat on an individual by individual basis.
The estimated migration rates by habitat type and disease state. Each open point represents the estimated migration rate across samples for each habitat within each individual. Solid points represent the average across individuals for that habitat, while error bars represent the standard error.
###Table XX: Analysis of variance for estimated migration rates across habitats and disease (Sloan model)
| Df | Sum Sq | Mean Sq | F value | Pr(>F) | |
|---|---|---|---|---|---|
| Habitat_Class | 1 | 0.0077036 | 0.0077036 | 6.1409980 | 0.0265661 |
| Tooth_Aspect | 1 | 0.0000068 | 0.0000068 | 0.0053846 | 0.9425422 |
| Cohort | 1 | 0.0066593 | 0.0066593 | 5.3084892 | 0.0370669 |
| Habitat_Class:Tooth_Aspect | 1 | 0.0000021 | 0.0000021 | 0.0016993 | 0.9677008 |
| Habitat_Class:Cohort | 1 | 0.0055293 | 0.0055293 | 4.4077261 | 0.0543876 |
| Tooth_Aspect:Cohort | 1 | 0.0000002 | 0.0000002 | 0.0001855 | 0.9893245 |
| Habitat_Class:Tooth_Aspect:Cohort | 1 | 0.0000026 | 0.0000026 | 0.0020345 | 0.9646600 |
| Residuals | 14 | 0.0175624 | 0.0012545 | NA | NA |
###Figure 3A (v2): Estimated migration rates of tooth habitats (Gst)
###Table XX: Analysis of variance for estimated migration rates across habitats and disease (Gst)
| Df | Sum Sq | Mean Sq | F value | Pr(>F) | |
|---|---|---|---|---|---|
| Habitat_Class | 1 | 0.0574987 | 0.0574987 | 1.3266536 | 0.2500076 |
| Tooth_Aspect | 1 | 0.1340551 | 0.1340551 | 3.0930216 | 0.0793023 |
| Cohort | 1 | 4.2313110 | 4.2313110 | 97.6280297 | 0.0000000 |
| Habitat_Class:Tooth_Aspect | 1 | 0.0211934 | 0.0211934 | 0.4889896 | 0.4847365 |
| Habitat_Class:Cohort | 1 | 0.3830325 | 0.3830325 | 8.8376183 | 0.0031078 |
| Tooth_Aspect:Cohort | 1 | 0.0053916 | 0.0053916 | 0.1243998 | 0.7244744 |
| Habitat_Class:Tooth_Aspect:Cohort | 1 | 0.0037767 | 0.0037767 | 0.0871386 | 0.7679818 |
| Residuals | 454 | 19.6768818 | 0.0433411 | NA | NA |
#Part 2: The aggregate fit of the neutral model by disease state and habitat For visualization purposes, the following plots show the fit of the model averaged across individuals. Note, however, that the analyses in the test and other figures were done by fitting the model to groups of samples within individuals.
###Figure 3B-E: Observed and predicted distribution of microbial taxa The fit of the neutral model to observed data averaged across individuals by habitat and disease state. The black points represent individual taxa, while the blue line represents the predicted values.
#Healthy Subgingival
dat <- subset(ncm.preds, Aim=='SA1' & Habitat_Class=='Sub')
B.2 <- as.data.frame(cbind(with(dat, aggregate(p, list(OTU), mean))[,2], with(dat, aggregate(freq, list(OTU), mean))[,2], with(dat, aggregate(OTU, list(OTU), function(x){as.character(unique(x))}))[,2]))
colnames(B.2) <- c('p.mean', 'freq.mean', 'OTU')
B.2$p.mean <- as.numeric(as.character(B.2$p.mean))
B.2$freq.mean <- as.numeric(as.character(B.2$freq.mean))
Bf.2 <- subset(B.2, OTU %in% tax.focus$OTU)
Bf.2 <- merge(Bf.2, tax.focus, by.x='OTU', by.y='OTU', all=FALSE)
m.fit <- with(B.2, nlsLM(freq.mean ~ pbeta(1/min.seq, min.seq*m*p.mean, min.seq*m*(1-p.mean), lower.tail=FALSE), start=list(m=0.1)))
fun.pred.2 <- function(x){pbeta(1/min.seq, min.seq*coef(m.fit)*x, min.seq*coef(m.fit)*(1-x), lower.tail=FALSE)}
plot.sa1_sub <- ggplot(B.2, aes(x=p.mean, y=freq.mean)) +
geom_point() +
geom_point(aes(x=p.mean, y=freq.mean), data=Bf.2, colour='white') +
geom_point(aes(x=p.mean, y=freq.mean, shape=code), data=Bf.2, colour='red', size=4) +
scale_shape_manual(values=as.numeric(as.character(Bf.2$code))) +
stat_function(data=B.2, aes(x=p.mean, y=freq.mean), fun=fun.pred.2, colour='#0072B2') +
scale_x_log10() +
ylab("Occurrence frequency") +
xlab("log(Mean Relative Abundance)") +
ggtitle("Subgingival communities across control subjects") +
coord_cartesian(ylim=c(-0.025,1.025)) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line.x = element_line(color="black"),
axis.line.y = element_line(color="black"),
axis.title.x = element_text(colour='black'),
axis.text.x = element_text(colour='black'),
axis.title.y = element_text(colour='black'),
axis.text.y = element_text(colour='black'),
legend.position='none')
#Disease Supragingival
dat <- subset(ncm.preds, Aim=='SA3' & Habitat_Class=='Supra')
B.3 <- as.data.frame(cbind(with(dat, aggregate(p, list(OTU), mean))[,2], with(dat, aggregate(freq, list(OTU), mean))[,2], with(dat, aggregate(OTU, list(OTU), function(x){as.character(unique(x))}))[,2]))
colnames(B.3) <- c('p.mean', 'freq.mean', 'OTU')
B.3$p.mean <- as.numeric(as.character(B.3$p.mean))
B.3$freq.mean <- as.numeric(as.character(B.3$freq.mean))
Bf.3 <- subset(B.3, OTU %in% tax.focus$OTU)
Bf.3 <- merge(Bf.3, tax.focus, by.x='OTU', by.y='OTU', all=FALSE)
m.fit <- with(B.3, nlsLM(freq.mean ~ pbeta(1/min.seq, min.seq*m*p.mean, min.seq*m*(1-p.mean), lower.tail=FALSE), start=list(m=0.1)))
fun.pred.3 <- function(x){pbeta(1/min.seq, min.seq*coef(m.fit)*x, min.seq*coef(m.fit)*(1-x), lower.tail=FALSE)}
plot.sa3_supra <- ggplot(B.3, aes(x=p.mean, y=freq.mean)) +
geom_point() +
geom_point(aes(x=p.mean, y=freq.mean), data=Bf.3, colour='white') +
geom_point(aes(x=p.mean, y=freq.mean, shape=code), data=Bf.3, colour='red', size=4) +
scale_shape_manual(values=as.numeric(as.character(Bf.3$code))) +
stat_function(data=B.3, aes(x=p.mean, y=freq.mean), fun=fun.pred.3, colour='#0072B2') +
scale_x_log10() +
ylab("Occurrence frequency") +
xlab("log(Mean Relative Abundance)") +
ggtitle("Supragingival communities across Low Flow Cohort") +
coord_cartesian(ylim=c(-0.025,1.025)) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line.x = element_line(color="black"),
axis.line.y = element_line(color="black"),
axis.title.x = element_text(colour='black'),
axis.text.x = element_text(colour='black'),
axis.title.y = element_text(colour='black'),
axis.text.y = element_text(colour='black'),
legend.position='none')
#Disease Subgingival
dat <- subset(ncm.preds, Aim=='SA3' & Habitat_Class=='Sub')
B.4 <- as.data.frame(cbind(with(dat, aggregate(p, list(OTU), mean))[,2], with(dat, aggregate(freq, list(OTU), mean))[,2], with(dat, aggregate(OTU, list(OTU), function(x){as.character(unique(x))}))[,2]))
colnames(B.4) <- c('p.mean', 'freq.mean', 'OTU')
B.4$p.mean <- as.numeric(as.character(B.4$p.mean))
B.4$freq.mean <- as.numeric(as.character(B.4$freq.mean))
Bf.4 <- subset(B.4, OTU %in% tax.focus$OTU)
Bf.4 <- merge(Bf.4, tax.focus, by.x='OTU', by.y='OTU', all=FALSE)
m.fit <- with(B.4, nlsLM(freq.mean ~ pbeta(1/min.seq, min.seq*m*p.mean, min.seq*m*(1-p.mean), lower.tail=FALSE), start=list(m=0.1)))
fun.pred.4 <- function(x){pbeta(1/min.seq, min.seq*coef(m.fit)*x, min.seq*coef(m.fit)*(1-x), lower.tail=FALSE)}
plot.sa3_sub <- ggplot(B.4, aes(x=p.mean, y=freq.mean)) +
geom_point() +
geom_point(aes(x=p.mean, y=freq.mean), data=Bf.4, colour='white') +
geom_point(aes(x=p.mean, y=freq.mean, shape=code), data=Bf.4, colour='red', size=4) +
scale_shape_manual(values=as.numeric(as.character(Bf.4$code))) +
stat_function(data=B.4, aes(x=p.mean, y=freq.mean), fun=fun.pred.4, colour='#0072B2') +
scale_x_log10() +
ylab("Occurrence frequency") +
xlab("log(Mean Relative Abundance)") +
ggtitle("Subgingival communities across Low Flow Cohort") +
coord_cartesian(ylim=c(-0.025,1.025)) +
theme_bw() +
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.border = element_blank(),
axis.line.x = element_line(color="black"),
axis.line.y = element_line(color="black"),
axis.title.x = element_text(colour='black'),
axis.text.x = element_text(colour='black'),
axis.title.y = element_text(colour='black'),
axis.text.y = element_text(colour='black'),
legend.position='none')
###Figure 3 Bonus: Observed and predicted distribution of microbial taxa for each individual metacommunity The following are plots just as above but not averaged across individual metacommunities.
#Part 3: Deviations from the neutral model We next wanted to investigate whether taxa deviated consistently from the predictions of the model across individuals, occurring either more or less frequently (i.e. being detected in a greater or lesser number of samples) than expected given their average abundance across samples. Taxa that consistently are found more frequently than expected given their average abundance may be under strong positive selection by the host environment or have particularly high dispersal rates, while the oppositie may be true for taxa found less frequently than expected given their average abundance.
For the following two plots, each open point represents the deviation/residuals from the model prediction for each taxa within each subject. Solid red points represent the mean while error bars represent the standard error. There is a dashed vertical line at zero to indicate the model prediction; points to the right of this line represent taxa that are found more frequently than expected, while points to the left of this line represent taxa that are found less frequently than expected.
###Figure X: Deviations from neutral model in SUPRAgingival habitats across subjects
###Figure X: Deviations from neutral model in SUBgingival habitats across subjects